Scripts to run CAVIAR, FINEMAP (version 1.1) and DAP-G
# knitr::opts_chunk$set(error = TRUE) # Knit even with errors
# devtools::install_local("echolocatoR/", force = T)
# library("echolocatoR")
source("echolocatoR/R/MAIN.R")
# MINERVA PATHS TO SUMMARY STATISTICS DATASETS:
Data_dirs <- read.csv("./Data/directories_table.csv")
finemap_results <- list()
coloc_results <- list()Parkinson’s Disease GWAS.
dataset_name <- "Nalls23andMe_2019"
top_SNPs <- Nalls_top_SNPs <- import_topSNPs(
topSS_path = Directory_info(Data_dirs, dataset_name, "topSumStats"),
chrom_col = "CHR", position_col = "BP", snp_col="SNP",
pval_col="P, all studies", effect_col="Beta, all studies", gene_col="Nearest Gene",
caption= "Nalls et al. (2018) w/ 23andMe PD GWAS Summary Stats",
group_by_locus = T,
locus_col = "Locus Number",
remove_variants = "rs34637584")
# Manually add new SNP of interest
top_SNPs <- data.table::rbindlist(list(top_SNPs,
data.table::data.table(CHR=14,POS=55360203, SNP="rs3783642",
P=0, Effect=1, Gene="ATG14", Locus=NA),
data.table::data.table(CHR=12,POS=53797236, SNP="rs34656641",
P=0, Effect=1, Gene="SP1", Locus=NA),
data.table::data.table(CHR=5,POS=126181862, SNP="rs959573",
P=0, Effect=1, Gene="LMNB1", Locus=NA),
data.table::data.table(CHR=17,POS=40609405, SNP="rs9897702",
P=0, Effect=1, Gene="ATP6V0A1", Locus=NA)
))
gene_list <- unique(top_SNPs$Gene)
finemap_results[[dataset_name]] <- finemap_gene_list(top_SNPs,
gene_list = unique(c("LRRK2", gene_list))[1:2],
trim_gene_limits = c(T, rep(F,length(gene_list))),
dataset_name = dataset_name,
dataset_type = "GWAS",
query_by ="coordinates",
subset_path = "auto",
finemap_method = c("SUSIE","FINEMAP"),
#,"ABF","COJO"),
force_new_LD = F,
force_new_subset = F,
force_new_finemap = F,
# file_path = Directory_info(dataset_name, "fullSumStats"),
fullSS_path = "./Data/GWAS/Nalls23andMe_2019/nallsEtAl2019_allSamples_allVariants.mod.txt",
chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
pval_col = "p", effect_col = "beta", stderr_col = "se",
freq_col = "freq", MAF_col = "calculate",
A1_col = "A1",
A2_col = "A2",
bp_distance = 500000*2,
download_reference = T,
LD_reference = "1KG_Phase1",
superpopulation = "EUR",
LD_block = F,
min_MAF = 0,
remove_variants = c("rs34637584"),
remove_correlates = c("rs34637584"),
conditioned_snps = "auto", # Use the lead SNP for each gene
n_causal = 5,
remove_tmps = T)## ^^^^^^^^^ Running echolocatoR on: LRRK2 ^^^^^^^^^
[1] “Generating summary table…” [1] “LRRK2 fine-mapped in 19.52 seconds”
## ^^^^^^^^^ Running echolocatoR on: ASXL3 ^^^^^^^^^
[1] “Generating summary table…” [1] “ASXL3 fine-mapped in 3.76 seconds”
#
# dat <- fread("Data/GWAS/Nalls23andMe_2019/ATP6V0A1/FINEMAP/data.snp")
# dat2 <- fread("Data/GWAS/Nalls23andMe_2019/ATP6V0A1/Multi-finemap/Multi-finemap_results.txt")
# dat2$SUSIE.Probability %>% unique()merged_results <- merge_finemapping_results(minimum_support=1,
include_leadSNPs=T,
xlsx_path="./Data/annotated_finemapping_results.xlsx",
from_storage=T,
haploreg_annotation=T,
biomart_annotation=T,
verbose = F)## [1] "++ Submitting chunk 1 / 1"
## Note: zip::zip() is deprecated, please use zip::zipr() instead
NOTE: Must run ‘merge_finemapping_results’ with haploreg_annotation=T for this to work.
# Summarize epigenetics
## Across all datasets and loci
epi_table <- epigenetics_summary(merged_results,
tissue_list = c("BRN","BLD"),
epigenetic_variables = c("Promoter_histone_marks","Enhancer_histone_marks"))## Hits Total_SNPs Percent_Total
## Promoter_histone_marks 215 1297 16.58
## Enhancer_histone_marks 548 1297 42.25
## Within LRRK2
### Enrichment
LRRK2_dat <- data.table::fread("Data/GWAS/Nalls23andMe_2019/LRRK2/Multi-finemap/Multi-finemap_results.txt",
sep="\t", stringsAsFactors = F)
epigenetics_enrichment(snp_list1 = subset(LRRK2_dat, Support >= 1)$SNP %>% unique(),
snp_list2 = LRRK2_dat$SNP %>% unique(),
chisq = T,
fisher = T,
tissue_list = c("BRN","BLD"),
epigenetic_variables = c("Promoter_histone_marks","Enhancer_histone_marks")) ## [1] "Conducting SNP epigenomic annotation enrichment tests..."
## [1] "+++ SNP list 1 :"
## [1] "+ Gathering annotation data from HaploReg..."
## [1] "++ Submitting chunk 1 / 1"
## Hits Total_SNPs Percent_Total
## Promoter_histone_marks 3 6 50
## Enhancer_histone_marks 4 6 66.67
## [1] "+++ SNP list 2 :"
## [1] "+ Gathering annotation data from HaploReg..."
## [1] "++ Submitting chunk 1 / 1"
## Hits Total_SNPs Percent_Total
## Promoter_histone_marks 48 560 8.57
## Enhancer_histone_marks 141 560 25.18
## [1] "++ Testing for enrichment of ' Promoter_histone_marks ' in the tissues ' BRN & BLD '"
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: cont_tab
## X-squared = 7.5689, df = NA, p-value = 0.03198
##
##
## Fisher's Exact Test for Count Data
##
## data: cont_tab
## p-value = 0.03134
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.9106592 28.2085725
## sample estimates:
## odds ratio
## 5.801191
##
## [1] "++ Testing for enrichment of ' Enhancer_histone_marks ' in the tissues ' BRN & BLD '"
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: cont_tab
## X-squared = 2.4016, df = NA, p-value = 0.2429
##
##
## Fisher's Exact Test for Count Data
##
## data: cont_tab
## p-value = 0.1264
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.5411402 11.3177243
## sample estimates:
## odds ratio
## 2.643166
potential_missense <- SNPs_by_mutation_type(merged_results, mutation_type="missense_variant")
createDT(potential_missense)## [1] "\n + All 2 models converged upon 1 to 9 SNPs in 56.63 % of loci."
# Distribution of mutation types in Credible Set SNPs
mod <- merged_results %>%
mutate(CredSet.Lead =
ifelse(Support>0,
ifelse(leadSNP==T,"CS & leadSNP", "CS & non-leadSNP"),
ifelse(leadSNP==T, "Non-CS & leadSNP","Non-CS & non-leadSNP")),NA)
ggplot(data=mod, aes(x=consequence_type_tv, fill=CredSet.Lead)) +
geom_bar(aes(y = ..count..), position="dodge") + # (..count..)/sum(..count..)
coord_flip() +
labs(title="Proportions of Mutation Types (Biomart)",
subtitle = "Support = Within Credible Set",
x = "Mutation Type",
y="SNP Count") +
theme(legend.text = element_text(size=8))# snp_list <- subset(merged_results, Gene=="LRRK2")$SNP %>% unique()
snp_list <- c("rs7294519", "rs11175620", "rs76904798")
merged_eQTL_data <- eQTL_boxplots(snp_list,
eQTL_SS_paths = file.path("Data/eQTL/Fairfax_2014",
c("CD14/LRRK2/LRRK2_Fairfax_CD14.txt",
"IFN/LRRK2/LRRK2_Fairfax_IFN.txt",
"LPS2/LRRK2/LRRK2_Fairfax_LPS2.txt",
"LPS24/LRRK2/LRRK2_Fairfax_LPS24.txt")),
expression_paths = file.path("Data/eQTL/Fairfax_2014",
c("CD14/CD14.47231.414.b.txt",
"IFN/IFN.47231.367.b.txt",
"LPS2/LPS2.47231.261.b.txt",
"LPS24/LPS24.47231.322.b.txt")),
genotype_path = "Data/eQTL/Fairfax_2014/geno.subset.txt",
probe_path = "Data/eQTL/Fairfax_2014/gene.ILMN.map",
.fam_path = "Data/eQTL/Fairfax_2014/volunteers_421.fam",
gene = "LRRK2",
show_plot = T,
SS_annotations = F,
interact = F) ## [1] ""
## [1] "+ Processsing Expression data"
## [1] "++ Extracting probe info"
## [1] "++ CD14"
## [1] "++ IFN"
## [1] "++ LPS2"
## [1] "++ LPS24"
## [1] ""
## [1] "+ Processing Summary Stats data"
## [1] ""
## [1] "+ Processing Genotype data"
## Warning in melt.data.table(., geno_subset, id.vars = c("CHR", "SNP",
## "POS", : 'measure.vars' [289, 290, 292, 293, ...] are not all of the same
## type. By order of hierarchy, the molten data value column will be of type
## 'double'. All measure variables not of type 'double' will be coerced too.
## Check DETAILS in ?melt.data.table for more on coercion.
## [1] ""
## [1] "+ Merging Summary Stats, Genotype, and Expression data"
## [1] ""
## [1] "+ Plotting eQTLs"
subpops <- c("CAU","AFA","HIS")
dataset2_pathList <- paste0("./Data/eQTL/MESA_",subpops,"/LRRK2_",subpops,"_MESA.txt")
colocalize_geneList(gene_list = c("LRRK2"),
dataset1_path = "./Data/GWAS/Nalls23andMe_2019/LRRK2/Multi-finemap/Multi-finemap_results.txt",
dataset2_pathList = dataset2_pathList,
dataset1_type = "cc",
dataset2_type = "quant",
shared_MAF = 1,
PP_threshold = .8,
save_results = T,
show_plot = T)
colocalize_geneList <- function(gene_list,
dataset1_path,
dataset2_pathList,
dataset1_type="cc",
dataset2_type = "quant",
shared_MAF=1,
PP_threshold=0.8,
save_results=T,
show_plot=T,
chrom_col = "chr",
position_col = "pos_snps",
snp_col="snps",
pval_col="pvalue",
effect_col="beta",
gene_col="gene_name",
stderr_col = "calculate",
tstat_col = "statistic"){
for(g in gene_list){
cat('\n')
cat("###", g, "\n")
for(d2 in dataset2_pathList){
comparison_name <- paste0(basename(dataset1_path),".vs.",basename(d2))
subset_DT <- extract_SNP_subset(gene = gene,
top_SNPs = top_SNPs,
fullSS_path = fullSS_path,
subset_path = subset_path,
force_new_subset = force_new_subset,
chrom_col = chrom_col,
position_col = position_col,
snp_col = snp_col,
pval_col = pval_col,
effect_col = effect_col,
stderr_col = stderr_col,
gene_col = gene_col,
tstat_col = tstat_col,
MAF_col = MAF_col,
freq_col = freq_col,
A1_col = A1_col,
A2_col = A2_col,
bp_distance = bp_distance,
superpopulation = superpopulation,
min_POS = min_POS,
max_POS = max_POS,
file_sep = file_sep,
query_by = query_by,
probe_path = probe_path
)
coloc_DT <- COLOC(
gene = g,
dataset1_path = dataset1_path,
dataset2_path = d2,
dataset1_type = dataset1_type,
dataset2_type = dataset2_type,
shared_MAF = shared_MAF,
PP_threshold = PP_threshold,
save_results = save_results,
show_plot = show_plot,
)
coloc_DT <- cbind(Comparison = comparison_name)
coloc_results[[dataset_name]]
}
cat("\n")
}
return(coloc_results %>% data.table::rbindlist())
}gather_ff_data <- function(gene_snp_df){
gene_list <- unique(gene_snp_df$Gene)
# Loop through genes
ff_summary <- lapply(gene_list, function(gene, gene_snp_df.=gene_snp_df){
printer("+ Gathering Fairfax eQTLS for LRRK2")
bp <- subset(gene_snp_df., Gene==gene)$POS[1]
# Loop through eQTL conditions
dat <- lapply(c("CD14","IFN","LPS2","LPS24"), function(condition, bp.=bp, gene.=gene){
printer("+",condition)
ff <- data.table::fread(file.path("./Data/eQTL/Fairfax_2014",condition,gene.,
paste0(gene.,"_Fairfax_",condition,".txt")), sep="\t") %>%
subset(POS==bp.) %>%
data.table::as.data.table() %>%
tibble::add_column(Condition=condition, .before =1) %>%
dplyr::rename(PROBE_ID=gene)
return(ff)
}) %>% data.table::rbindlist()
return(dat %>% tibble::add_column(Gene=gene, .before =1))
}) %>% data.table::rbindlist()
return(ff_summary)
}
gene_snp_df <- subset(merged_results, Gene=="LRRK2" & Support==2,
select = c("Gene","SNP","CHR","POS")) %>% unique()
createDT(gather_ff_data(gene_snp_df)) ## [1] "+ Gathering Fairfax eQTLS for LRRK2"
## [1] "+ CD14"
## [1] "+ IFN"
## [1] "+ LPS2"
## [1] "+ LPS24"
gene_list <- c("LRRK2")
subpops <- c("CAU","AFA","HIS")
for(g in gene_list){
for(s in subpops){
dataset_name <- paste0("MESA_",s)
finemap_results[[dataset_name]] <- finemap_gene_list(
top_SNPs = Nalls_top_SNPs,
gene_list=c("LRRK2"),
superpopulation = s,
dataset_name = dataset_name,
finemap_method = c("SUSIE"),#c("SUSIE","ABF","FINEMAP"),
force_new_LD = T,
force_new_finemap = T,
force_new_subset = T,
dataset_type = "eQTL",
query_by = "fullSS",
file_sep = "\t",
# fullSS_path = Directory_info(dataset_name, "fullSumStats"),
fullSS_path = paste0("./Data/eQTL/MESA_",s,"/",g,"_MESA_",s,".txt"),
subset_path = "auto",
chrom_col = "chr", position_col = "pos_snps", snp_col="snps",
pval_col="pvalue", effect_col="beta", gene_col="gene_name",
stderr_col = "calculate", tstat_col = "statistic",
n_causal = 5,
download_reference = T,
remove_tmps = F)
}
} # +++++++++++++++ CD14 Stimulation +++++++++++++++ #
dataset_name <- "Fairfax_2014_CD14"
allResults[[dataset_name]] <- finemap_gene_list( gene_list=c("LRRK2"),
superpopulation = "CAU",
dataset_name = dataset_name,
dataset_type = "eQTL",
top_SNPs = Nalls_top_SNPs,
query_by = "probes",
probe_path = "./Data/eQTL/gene.ILMN.map",
file_sep = " ",
# fullSS_path = Directory_info(Data_dirs, dataset_name, "fullSumStats"),
fullSS_path = "./Data/eQTL/Fairfax_2014_CD14/LRRK2_Fairfax_CD14.txt",
subset_path = "auto",
chrom_col = "CHR", position_col = "POS", snp_col="SNP",
pval_col="p-value", effect_col="beta", gene_col="gene",
stderr_col = "calculate", tstat_col = "t-stat",
vcf_folder = vcf_folder, n_causal = 1,
force_new_subset = force_new_subset,
LD_block = T)## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] susieR_0.8.1.0521 GeneOverlap_1.20.0 haploR_3.0.1
## [4] XGR_1.1.5 dnet_1.1.4 supraHex_1.22.0
## [7] hexbin_1.27.3 igraph_1.2.4.1 coloc_3.2-1
## [10] snpStats_1.34.0 Matrix_1.2-17 survival_2.44-1.1
## [13] biomaRt_2.40.0 BiocManager_1.30.4 tidyr_0.8.3
## [16] gaston_1.5.5 RcppParallel_4.4.3 Rcpp_1.0.1
## [19] curl_3.3 ggrepel_0.8.1 cowplot_0.9.4
## [22] plotly_4.9.0 ggplot2_3.2.0 dplyr_0.8.1
## [25] data.table_1.12.2 readxl_1.3.1 usethis_1.5.0
## [28] devtools_2.0.2 R.utils_2.9.0 R.oo_1.22.0
## [31] R.methodsS3_1.7.1
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.4 RCircos_1.2.1 plyr_1.8.4
## [4] lazyeval_0.2.2 splines_3.6.0 crosstalk_1.0.0
## [7] wavethresh_4.6.8 GenomeInfoDb_1.20.0 ggnetwork_0.5.1
## [10] inline_0.3.15 digest_0.6.19 htmltools_0.3.6
## [13] gdata_2.18.0 magrittr_1.5 memoise_1.1.0
## [16] cluster_2.1.0 openxlsx_4.1.0.1 remotes_2.1.0
## [19] sna_2.4 prettyunits_1.0.2 colorspace_1.4-1
## [22] blob_1.1.1 rrcov_1.4-7 xfun_0.8
## [25] callr_3.2.0 crayon_1.3.4 RCurl_1.95-4.12
## [28] jsonlite_1.6 graph_1.62.0 ape_5.3
## [31] glue_1.3.1 gtable_0.3.0 zlibbioc_1.30.0
## [34] XVector_0.24.0 pkgbuild_1.0.3 Rgraphviz_2.28.0
## [37] BiocGenerics_0.30.0 DEoptimR_1.0-8 scales_1.0.0
## [40] mvtnorm_1.0-11 DBI_1.0.0 viridisLite_0.3.0
## [43] xtable_1.8-4 progress_1.2.2 bit_1.1-14
## [46] stats4_3.6.0 DT_0.7 htmlwidgets_1.3
## [49] httr_1.4.0 gplots_3.0.1.1 RColorBrewer_1.1-2
## [52] pkgconfig_2.0.2 reshape_0.8.8 XML_3.98-1.20
## [55] reshape2_1.4.3 tidyselect_0.2.5 labeling_0.3
## [58] rlang_0.4.0 later_0.8.0 AnnotationDbi_1.46.0
## [61] munsell_0.5.0 cellranger_1.1.0 tools_3.6.0
## [64] cli_1.1.0 RSQLite_2.1.1 statnet.common_4.3.0
## [67] evaluate_0.14 stringr_1.4.0 yaml_2.2.0
## [70] processx_3.3.1 knitr_1.23 bit64_0.9-7
## [73] fs_1.3.1 zip_2.0.2 robustbase_0.93-5
## [76] caTools_1.17.1.2 purrr_0.3.2 nlme_3.1-140
## [79] mime_0.7 leaps_3.0 compiler_3.6.0
## [82] tibble_2.1.3 pcaPP_1.9-73 stringi_1.4.3
## [85] ps_1.3.0 desc_1.2.0 lattice_0.20-38
## [88] pillar_1.4.1 RUnit_0.4.32 bitops_1.0-6
## [91] httpuv_1.5.1 GenomicRanges_1.36.0 R6_2.4.0
## [94] promises_1.0.1 network_1.15 KernSmooth_2.23-15
## [97] IRanges_2.18.1 sessioninfo_1.1.1 MASS_7.3-51.4
## [100] gtools_3.8.1 assertthat_0.2.1 pkgload_1.0.2
## [103] BMA_3.18.9 rprojroot_1.3-2 withr_2.1.2
## [106] S4Vectors_0.22.0 GenomeInfoDbData_1.2.1 parallel_3.6.0
## [109] hms_0.4.2 grid_3.6.0 coda_0.19-2
## [112] rmarkdown_1.13 Biobase_2.44.0 shiny_1.3.2
## [1] "susieR 0.8.1.521"